/*******************************************************************************
																				
	DESCRIPTION: 	This do file creates crosswalks for industry codes.
					
*******************************************************************************/

clear all
global id_code 001_2

/******************************
*	1. Setup
******************************/

import delimited "${data}/SNI_2002_92", clear

* Rename
rename sni2002 SNI92_new
rename sni92 SNI92_old

* Relabel
label variable SNI92_new ""
label variable SNI92_old ""

drop if SNI92_old==.

* Generate 2 variables for transformation of 2002 codes
gen SNI2002_old = .
gen SNI2002_new = .

* Import translations for 2007 codes to 2002 codes
preserve
import delimited "${data}/SNI_2002_2007", clear
rename sni2002 SNI2007_new
rename sni2007 SNI2007_old
label variable SNI2007_new ""
label variable SNI2007_old ""

tempfile temp
save `temp'
restore

append using `temp'

/******************************
*	2. Harmonise 2007 codes
******************************/

* What we have so far: translations for 1992 and 2007 into the 2002 codes. 2002 
* codes are kept as they are. 
* Problem: some 1992 and 2007 codes translate into multiple 2002 codes.

local n = _N

sort SNI2007_old SNI2007_new

* Loop i through all translations
forval i = 1/`n' {
	
	if SNI2007_old[`i']!=. {
	
		* Loop j through all translations
		forval j = 1/`n' {
			
			* Does one 2007 code (old_i = old_j) translate into two different 
			* new codes (new_i and new_j)?
			if SNI2007_old[`i']==SNI2007_old[`j'] & `i'!=`j' {
				
				* If yes...
				
				* For 2002 codes, set any instances of new_j to be 
				* translated to new_i
				replace SNI2002_old = SNI2007_new[`j'] in `j'
				replace SNI2002_new = SNI2007_new[`i'] in `j'
				
				* For 2007 and 1992 codes, set any code that was being translated into 
				* new_j to now be translated into new_i
				replace SNI2007_new = SNI2007_new[`i'] if SNI2007_new==SNI2007_new[`j']
				replace SNI92_new = SNI2007_new[`i'] if SNI92_new==SNI2007_new[`j']
				
				* Remove the translation of old_j to new_j
				replace SNI2007_old = . in `j'
				replace SNI2007_new = . in `j'	
				
			} 
			
		}
	
	}
	
}

* Drop duplicates
duplicates drop SNI92_old SNI92_new if SNI92_old!=., force
duplicates drop SNI2002_old SNI2002_new if SNI2002_old!=., force
duplicates drop SNI2007_old SNI2007_new if SNI2007_old!=., force

/******************************
*	3. Harmonise 1992 codes
******************************/

local n = _N

sort SNI92_old SNI92_new

* Loop i through all translations
forval i = 1/`n' {
	
	if SNI92_old[`i']!=. {
	
		* Loop j through all translations
		forval j = 1/`n' {
			
			* Does one 1992 code (old_i = old_j) translate into two different 
			* new codes (new_i and new_j)?
			if SNI92_old[`i']==SNI92_old[`j'] & `i'!=`j' {
				
				* If yes...
				
				* For 2002 codes, set any instances of new_j to be 
				* translated to new_i
				replace SNI2002_old = SNI92_new[`j'] in `j'
				replace SNI2002_new = SNI92_new[`i'] in `j'
				
				* For 2007 and 1992 codes, set any code that was being translated into 
				* new_j to now be translated into new_i
				replace SNI2007_new = SNI92_new[`i'] if SNI2007_new==SNI92_new[`j']
				replace SNI92_new = SNI92_new[`i'] if SNI92_new==SNI92_new[`j']
				
				* Remove the translation of old_j to new_j
				replace SNI92_old = . in `j'
				replace SNI92_new = . in `j'	
				
			} 
			
		}
	
	}
	
}

* Drop duplicates
duplicates drop SNI92_old SNI92_new if SNI92_old!=., force
duplicates drop SNI2002_old SNI2002_new if SNI2002_old!=., force
duplicates drop SNI2007_old SNI2007_new if SNI2007_old!=., force

/******************************
*	4. Harmonise 2002 translations
******************************/

* While harmonising the 1992 and 2007 codes, we had to assign new values to 
* many 2002 codes. It is possible that this has resulted in the same 2002 code
* being assigned multiple new values.

local n = _N

* Loop i through all translations
forval i = 1/`n' {
	
	if SNI2002_old[`i']!=. {
	
		* Loop j through all translations
		forval j = 1/`n' {

			if SNI2002_old[`i']==SNI2002_old[`j'] & `i'!=`j' {
		
				* If yes...
				
				* For 1992 and 2007 codes, set anything being translated into 
				* new_j to instead be translated into new_i
				replace SNI92_new = SNI2002_new[`i'] if SNI92_new==SNI2002_new[`j']
				replace SNI2007_new = SNI2002_new[`i'] if SNI2007_new==SNI2002_new[`j']
				
				* Remove the translation of old_j to new_j
				replace SNI2002_old = . in `j'
				replace SNI2002_new = . in `j'	
				
			} 
							
		}
	
	}
	
}

* Drop duplicates
duplicates drop SNI92_old SNI92_new if SNI92_old!=., force
duplicates drop SNI2002_old SNI2002_new if SNI2002_old!=., force
duplicates drop SNI2007_old SNI2007_new if SNI2007_old!=., force

/******************************
*	5. Export translations
******************************/

* 1992
preserve

keep SNI92_old SNI92_new
drop if SNI92_old==.

* Add some new translations (1250 and 2010 are not present in the official
* translation. I assume they are simply taken out of use in 2002.)
local new = _N + 3
set obs `new'

local i = _N - 2
replace SNI92_old = 0 in `i'
replace SNI92_new = 0 in `i'

local i = _N - 1
replace SNI92_old = 1250 in `i'
replace SNI92_new = 0 in `i'

local i = _N
replace SNI92_old = 2010 in `i'
replace SNI92_new = 0 in `i'

save "${data}/${id_code}_SNI92_new.dta", replace

restore

* 2002
preserve
keep SNI2002_old SNI2002_new
drop if SNI2002_old==.
save "${data}/${id_code}_SNI2002_new.dta", replace
restore

*2007
preserve
keep SNI2007_old SNI2007_new
drop if SNI2007_old==.

* Add some new translations
local new = _N + 2
set obs `new'

local i = _N - 1
replace SNI2007_old = 0 in `i'
replace SNI2007_new = 0 in `i'

local i = _N
replace SNI2007_old = 69210 in `i'
replace SNI2007_new = 0 in `i'

save "${data}/${id_code}_SNI2007_new.dta", replace
restore